library(readxl)
df <- read_excel("~/Desktop/Study/Courses/Data Analytics /Team Assignment /datasets_marketing_campaign_SF.xlsx")
clean.df <- function(x) {
na.omit(x)
}
df <- clean.df(df)
#Data massaging
##Assigning binary value for Marital_Status variable
####0 Value
df$marital_binary <- gsub("Absurd", "0", df$Marital_Status)
df$marital_binary <- gsub("Alone", "0", df$marital_binary)
df$marital_binary <- gsub("Divorced", "0", df$marital_binary)
df$marital_binary <- gsub("Single", "0", df$marital_binary)
df$marital_binary <- gsub("Widow", "0", df$marital_binary)
df$marital_binary <- gsub("YOLO", "0", df$marital_binary)
####1 Value
df$marital_binary <- gsub("Together", "1", df$marital_binary)
df$marital_binary <- gsub("Married", "1", df$marital_binary)
df$marital_binary <- as.numeric(df$marital_binary)
##Converting Education into numeric: The higher the education the higher the assigned value
df$eduction_level <- gsub("Basic", "1", df$Education)
df$eduction_level <- gsub("2n Cycle", "2", df$eduction_level)
df$eduction_level <- gsub("Graduation", "3", df$eduction_level)
df$eduction_level <- gsub("Master", "4", df$eduction_level)
df$eduction_level <- gsub("PhD", "5", df$eduction_level)
df$eduction_level <- as.numeric(df$eduction_level)
##Initialize blank variable for further calculations
df$US <- df$NumWebPurchases
df$total_sales <- df$NumWebPurchases
##Data massaging & initializing new columns
df$US <- gsub("AUS","0",df$Country)
df$US <- gsub("US","1",df$US)
df$US <- gsub("[a-zA-Z]","0", df$US)
df$US <- gsub(pattern="[[:punct:]]","0", df$US)
df$US<- as.numeric(df$US)
df$total_sales <- df$NumWebPurchases + df$NumStorePurchases + df$NumCatalogPurchases
df$USSales <- df$US * df$total_sales
df$ROTW <- (df$US-1) * (-df$total_sales)
sub_us<-aggregate(cbind(total_sales) ~ US, data = df, FUN=sum, na.rm=TRUE)
df$age_at_purchase <- df$Year_Birth
df$Dt_Customer_Converted <- as.Date(df$Dt_Customer)
df$Dt_Customer_Converted <- as.numeric(format(df$Dt_Customer_Converted,format="%Y" ))
df$age_at_purchase <- df$Dt_Customer_Converted - df$Year_Birth
df$US <- gsub("AUS","0",df$Country)
df$US <- gsub("US","1",df$US)
df$US <- gsub("[a-zA-Z]","0", df$US)
df$US <- gsub(pattern="[[:punct:]]","0", df$US)
df$US<- as.numeric(df$US)
df$total_sales <- df$NumWebPurchases + df$NumStorePurchases + df$NumCatalogPurchases
df$USSales <- df$US * df$total_sales
df$ROTW <- (df$US-1) * (-df$total_sales)
sub_us<-aggregate(cbind(total_sales) ~ US, data = df, FUN=sum, na.rm=TRUE)
world_linear <- lm(total_sales~Income+Kidhome+MntWines+MntFruits+MntMeatProducts+MntFishProducts+
MntSweetProducts+MntGoldProds+NumDealsPurchases+
AcceptedCmp5+Response, data = df)
summary(world_linear)
##
## Call:
## lm(formula = total_sales ~ Income + Kidhome + MntWines + MntFruits +
## MntMeatProducts + MntFishProducts + MntSweetProducts + MntGoldProds +
## NumDealsPurchases + AcceptedCmp5 + Response, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.3408 -1.6577 -0.3319 1.8370 14.8419
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.912e+00 2.399e-01 20.473 < 2e-16 ***
## Income 3.965e-05 4.012e-06 9.883 < 2e-16 ***
## Kidhome -2.406e+00 1.750e-01 -13.748 < 2e-16 ***
## MntWines 9.105e-03 3.285e-04 27.717 < 2e-16 ***
## MntFruits 1.147e-02 2.581e-03 4.446 9.20e-06 ***
## MntMeatProducts 3.946e-03 5.011e-04 7.874 5.33e-15 ***
## MntFishProducts 8.816e-03 1.945e-03 4.534 6.10e-06 ***
## MntSweetProducts 1.860e-02 2.465e-03 7.543 6.66e-14 ***
## MntGoldProds 1.476e-02 1.703e-03 8.668 < 2e-16 ***
## NumDealsPurchases 7.060e-01 4.192e-02 16.839 < 2e-16 ***
## AcceptedCmp5 -2.300e+00 3.485e-01 -6.600 5.12e-11 ***
## Response -3.889e-01 2.238e-01 -1.738 0.0824 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.499 on 2204 degrees of freedom
## Multiple R-squared: 0.7653, Adjusted R-squared: 0.7641
## F-statistic: 653.2 on 11 and 2204 DF, p-value: < 2.2e-16
ROTW_linear <- lm(ROTW~Income+Kidhome+MntWines+MntFruits+MntMeatProducts+MntFishProducts+
MntSweetProducts+MntGoldProds+NumDealsPurchases+
AcceptedCmp5+NumWebPurchases, data = df)
summary(ROTW_linear)
##
## Call:
## lm(formula = ROTW ~ Income + Kidhome + MntWines + MntFruits +
## MntMeatProducts + MntFishProducts + MntSweetProducts + MntGoldProds +
## NumDealsPurchases + AcceptedCmp5 + NumWebPurchases, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.0552 -0.9634 0.1297 1.5645 13.6198
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.106e+00 2.905e-01 10.694 < 2e-16 ***
## Income 2.857e-05 4.722e-06 6.049 1.71e-09 ***
## Kidhome -1.483e+00 2.076e-01 -7.145 1.22e-12 ***
## MntWines 5.748e-03 4.121e-04 13.948 < 2e-16 ***
## MntFruits 7.260e-03 3.035e-03 2.392 0.016825 *
## MntMeatProducts 3.909e-03 5.883e-04 6.645 3.81e-11 ***
## MntFishProducts 7.902e-03 2.285e-03 3.458 0.000554 ***
## MntSweetProducts 1.251e-02 2.916e-03 4.290 1.86e-05 ***
## MntGoldProds 6.876e-03 2.028e-03 3.391 0.000709 ***
## NumDealsPurchases 1.842e-01 5.164e-02 3.568 0.000368 ***
## AcceptedCmp5 -1.125e+00 4.007e-01 -2.807 0.005047 **
## NumWebPurchases 9.973e-01 4.278e-02 23.312 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.112 on 2204 degrees of freedom
## Multiple R-squared: 0.7028, Adjusted R-squared: 0.7013
## F-statistic: 473.8 on 11 and 2204 DF, p-value: < 2.2e-16
US_linear <- lm(USSales~MntMeatProducts+
NumDealsPurchases+
+Response, data = df)
summary(US_linear)
##
## Call:
## lm(formula = USSales ~ MntMeatProducts + NumDealsPurchases +
## +Response, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1170 -0.8883 -0.5482 -0.2681 28.0924
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0799928 0.1267622 0.631 0.5281
## MntMeatProducts 0.0017656 0.0003225 5.474 4.89e-08 ***
## NumDealsPurchases 0.1495908 0.0365244 4.096 4.36e-05 ***
## Response -0.4290861 0.2008926 -2.136 0.0328 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.281 on 2212 degrees of freedom
## Multiple R-squared: 0.01883, Adjusted R-squared: 0.0175
## F-statistic: 14.15 on 3 and 2212 DF, p-value: 3.878e-09
The total sales with the adjusted R squared of 76% in the multiple regression. Kidhome and acceptedcmp5 has a negative relation with the total sales. ROTW has a higher significance value with a postive relation for Mntfrits and Mntfish compared to the rest of the products (Mntsweetsproducts, Mntwine, Mntmeatproducts, Mntgold). The NumDealspurchase with an estimate of 0.7 has a strong impact on the total sales. Whereas the RTOW also have a strong relation with the Numdealspurchase with an estimate of 0.5 and with the r value of 62% states the RTOW and the totals sales are strongly related to the Numdealspurchases.
The multiple regression squared value shows a 62% relation with the ROTW (rest of the world) against the purchases of products. The kidhome and acceptedcmp5 show a negative co-efficient. For every kid home the ROTW has decrease by –2.18 times in Purchases. The NumDealsPurchases is strongly related to the ROTW with a estimate of 0.54 increase in purchases. RTOW showing a strong relation NumDealsPurchases, the CMO must offer deals to the customers to increase the purchases. Whereas the US market has a R of 1.7% with the purchases. The US market is positive with an estimate of 0.14 estimate increase in purchases.
#H0 : mu1 - mu2 <= 0
#Ha : mu1 - mu2 > 0
#Calculated the gold purchase mean
gold_mean <- mean(df$MntGoldProds)
df$date <- as.Date(df$Dt_Customer)
#Convert to gold purchase to binary
#initialize the gold_binary columns
df$gold_binary <- c()
for(i in 1:nrow(df)){
if (df$MntGoldProds[i]>gold_mean) {
df$gold_binary[i] <- "1"
}else{
df$gold_binary[i]<- "0"
}
}
####Interaction store purchase and gold_binary
#Convert to numeric type
df$gold_binary <- as.numeric(df$gold_binary)
#Creating a interaction between number of store purchases and gold_binary
df$store_x_gold <- df$NumStorePurchases*df$gold_binary
###Subsetting into two samples: customers purchasing gold more than the mean(gold)
#over the last 2 years and customers who don't.
df_gold<-df[which(df$gold_binary==1),]
#Subsetting gold_binary = 0 and store to its own data frame
df_gold0<-df[which(df$gold_binary!=1),]
#Statistical calculation
#Legends: mean1 and s1 are average of NumStorePurchases and standard deviation of sample 1 (customers with gold purchases greater than the mean of gold)
mean1 <- mean(df_gold$NumStorePurchases)
s1<-sd(df_gold$NumStorePurchases)
n1<- nrow(df_gold)
#Legends: mean2 and s2 are average of NumStorePurchases and standard deviation of sample 2 (customers with gold purchases smaller than the mean of gold)
mean2 <- mean(df_gold0$NumStorePurchases)
s2 <- sd(df_gold0$NumStorePurchases)
n2 <- nrow(df_gold0)
##Calculating t-statistic
t <- (mean1 - mean2)/(sqrt(s1^2/nrow(df_gold)+s2^2/nrow(df_gold0)))
##Since both samples have different variances, here is the calculation for the degree of freedom:
deg_f <- ((s1^2/nrow(df_gold)+s2^2/nrow(df_gold0))^2) / ((1/(nrow(df_gold)-1))*(s1^2/nrow(df_gold))^2 + (1/(nrow(df_gold0)-1))*(s2^2/nrow(df_gold0))^2)
##degree of freedom is greater than 1000
##--> Conclusion: with the DoF of 1326, our t-statistic = 21 so we can reject the Ho.
##Confounding Variables: Income since people with higher income tend to expense more on both in-store purchases and gold purchases
#Plotting NumStorePurchases against Income
library(plotly)
df$personality_type <- gsub("1", "Convervative", df$gold_binary)
df$personality_type <- gsub("0", "Less Convervative", df$personality_type)
org_scatter <- ggplot(data=df, aes(x= MntGoldProds, y=NumStorePurchases , color=personality_type)) + geom_point() + geom_smooth(method="lm")
#ggplotly(org_scatter)
test_values <- c(mean1, s1, n1, mean2, s2, n2, deg_f)
names_test <- c("Mean 1", "Standard Deviation 1", "Sample Size 1", "Mean 2", "Standard Deviation 2", "Sample Size 2", "Degrees of Freedom")
df_qc <- data.frame(names_test, test_values)
colnames(df_qc) <- c("Statistics","Values")
The table below included at the statistics derived from our two samples, in which necessary for our two-samples t-test for different in means.
## Statistics Values
## 1 Mean 1 7.767575
## 2 Standard Deviation 1 3.008214
## 3 Sample Size 1 697.000000
## 4 Mean 2 4.898618
## 5 Standard Deviation 2 2.946200
## 6 Sample Size 2 1519.000000
## 7 Degrees of Freedom 1325.738994
According to the our test with the DoF of 1326 and the t-statistic of 20.98, we are 95% confident that there is a relation between the amount spent on gold and the amount in store purchases.
Furthermore, the multiple regression analysis also point out the similar results, which consolidate our previous findings to a greater extent.
## `geom_smooth()` using formula 'y ~ x'
Hypothesis A: Those who likes to purchase golds products tend to see the chart of gold on the web site. Given that situation, the people prefer to buy any product online.
Hypothesis B: Those who are being rich likes to enjoy the day-off with wine & meat
Hypothesis C: People who are educated prefer to spend on sweets.
#H0: mean online purchases gold <= mean of online purchases
#Ha: mean online purchases gold > mean of online purchases
mu1_hypA <- mean(df_gold$NumWebPurchases)
s1_hypA <- sd(df_gold$NumWebPurchases)
n1_hypA <- nrow(df_gold)
mu2_hypA <- mean(df_gold0$NumWebPurchases)
s2_hypA <- sd(df_gold0$NumWebPurchases)
n2_hypA <- nrow(df_gold0)
t_hypeA <- (mu1_hypA - mu2_hypA)/(sqrt(s1_hypA^2/nrow(df_gold)+s2_hypA^2/nrow(df_gold0)))
##Since both samples have different variances, here is the calculation for the degree of freedom:
deg_f_hypeA <- ((s1_hypA^2/nrow(df_gold)+s2_hypA^2/nrow(df_gold0))^2) / ((1/(nrow(df_gold)-1))*(s1_hypA^2/nrow(df_gold))^2 + (1/(nrow(df_gold0)-1))*(s2_hypA^2/nrow(df_gold0))^2)
According to the our test with the DoF of 1357 and the t-statistic of 19.67, we are 95% confident that there is a relation between the amount spent on gold and the amount in online purchases.
#H0: mean of amount spent on meat and wine of the rich customers <= mean of amount spent on meat and wine of the poor customers
#Ha: mean of amount spent on meat and wine of the rich customers > mean of amount spent on meat and wine of the poor customers
mean_income <- mean(df$Income)
df$income_binary <- c()
for(i in 1:nrow(df)){
if (df$Income[i] > mean_income) {
df$income_binary[i] <- "1"
}else{
df$income_binary[i]<- "0"
}
}
## Warning: Unknown or uninitialised column: `income_binary`.
df$sum_wine_meat <- df$MntMeatProducts + df$MntWines
df_rich <- df[which(df$income_binary==1),]
df_poor <- df[which(df$income_binary==0),]
#H0: mean online purchases gold <= mean of online purchases
#Ha: mean online purchases gold > mean of online purchases
mu1_hypB <- mean(df_rich$sum_wine_meat)
s1_hypB <- sd(df_rich$sum_wine_meat)
n1_hypB <- nrow(df_rich)
mu2_hypB <- mean(df_poor$sum_wine_meat)
s2_hypB <- sd(df_poor$sum_wine_meat)
n2_hypB <- nrow(df_poor)
t_hypeB <- (mu1_hypB - mu2_hypB)/(sqrt(s1_hypB^2/nrow(df_rich)+s2_hypB^2/nrow(df_poor)))
##Since both samples have different variances, here is the calculation for the degree of freedom:
deg_f_hypeB <- ((s1_hypB^2/nrow(df_rich)+s2_hypB^2/nrow(df_poor))^2) / ((1/(nrow(df_rich)-1))*(s1_hypB^2/nrow(df_rich))^2 + (1/(nrow(df_poor)-1))*(s2_hypB^2/nrow(df_poor))^2)
According to the our test with the DoF of 1349 and the t-statistic of 49.92, we are 95% confident that rich people (income more than average income) spend more on wine and meat products than the poor people.
#H0: mean of amount spent on sweet products of the more educated customers <= mean of amount spent on sweet products of the less educated customers
#Ha: mean of amount spent on sweet products of the more educated customers > mean of amount spent on sweet products of the less educated customers
mean_edu <- mean(df$eduction_level)
df$edu_binary <- c()
for(i in 1:nrow(df)){
if (df$eduction_level[i] > mean_edu) {
df$edu_binary[i] <- "1"
}else{
df$edu_binary[i]<- "0"
}
}
## Warning: Unknown or uninitialised column: `edu_binary`.
df_smart <- df[which(df$edu_binary==1),]
df_not_really_smart <- df[which(df$edu_binary==0),]
mu1_hypC <- mean(df_smart$MntSweetProducts)
s1_hypC <- sd(df_smart$MntSweetProducts)
n1_hypC <- nrow(df_smart)
mu2_hypC <- mean(df_not_really_smart$MntSweetProducts)
s2_hypC <- sd(df_not_really_smart$MntSweetProducts)
n2_hypC <- nrow(df_not_really_smart)
t_hypeC <- (mu1_hypC - mu2_hypC)/(sqrt(s1_hypC^2/nrow(df_smart)+s2_hypC^2/nrow(df_not_really_smart)))
##Since both samples have different variances, here is the calculation for the degree of freedom:
deg_f_hypeC <- ((s1_hypC^2/nrow(df_smart)+s2_hypC^2/nrow(df_not_really_smart))^2) / ((1/(nrow(df_smart)-1))*(s1_hypC^2/nrow(df_smart))^2 + (1/(nrow(df_not_really_smart)-1))*(s2_hypC^2/nrow(df_not_really_smart))^2)
According to the our test with the DoF of 2102 and the t-statistic of -6.24, we are 95% confident that we cannot reject the null hypothesis. Therefore, we do not need to make promotion on sweet products for more educated people.
According to our analysis, we are 90% confident that income, number of kids in the household , number of teenagers in the household, amount spent on wine, meat, sweets, and gold, number of purchases made with discount and catalog, number of purchases made directly in stores, monthly number web visits, campaign 2 and campaign 5 seems to drive the web purchases
##
## Call:
## lm(formula = NumWebPurchases ~ Income + Kidhome + Teenhome +
## Recency + MntWines + MntFruits + MntMeatProducts + MntFishProducts +
## MntSweetProducts + MntGoldProds + NumDealsPurchases + NumCatalogPurchases +
## NumStorePurchases + NumWebVisitsMonth + AcceptedCmp3 + AcceptedCmp4 +
## AcceptedCmp5 + AcceptedCmp1 + AcceptedCmp2 + AcceptedCmp1 +
## Complain + Z_CostContact + Z_Revenue + Response, data = training_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.5659 -0.9768 -0.1364 0.9265 22.7602
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.012e+00 2.576e-01 -3.930 8.82e-05 ***
## Income 1.071e-05 2.470e-06 4.337 1.52e-05 ***
## Kidhome -7.297e-01 1.136e-01 -6.425 1.70e-10 ***
## Teenhome 3.491e-01 9.865e-02 3.539 0.000412 ***
## Recency 1.719e-03 1.583e-03 1.086 0.277673
## MntWines 2.633e-03 2.352e-04 11.192 < 2e-16 ***
## MntFruits 2.112e-03 1.576e-03 1.340 0.180543
## MntMeatProducts -6.177e-04 3.452e-04 -1.790 0.073694 .
## MntFishProducts 1.248e-03 1.188e-03 1.050 0.293643
## MntSweetProducts 4.095e-03 1.531e-03 2.675 0.007553 **
## MntGoldProds 7.671e-03 1.040e-03 7.377 2.48e-13 ***
## NumDealsPurchases 1.866e-01 2.955e-02 6.314 3.44e-10 ***
## NumCatalogPurchases 2.973e-02 2.651e-02 1.122 0.262118
## NumStorePurchases 1.875e-01 2.096e-02 8.945 < 2e-16 ***
## NumWebVisitsMonth 3.313e-01 2.727e-02 12.149 < 2e-16 ***
## AcceptedCmp3 1.361e-02 1.834e-01 0.074 0.940861
## AcceptedCmp4 -1.643e-01 1.967e-01 -0.835 0.403692
## AcceptedCmp5 -4.348e-01 2.185e-01 -1.990 0.046769 *
## AcceptedCmp1 4.570e-02 2.087e-01 0.219 0.826713
## AcceptedCmp2 -1.609e+00 3.997e-01 -4.025 5.94e-05 ***
## Complain 3.629e-02 4.608e-01 0.079 0.937238
## Z_CostContact NA NA NA NA
## Z_Revenue NA NA NA NA
## Response 3.177e-01 1.476e-01 2.153 0.031477 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.884 on 1750 degrees of freedom
## Multiple R-squared: 0.5158, Adjusted R-squared: 0.51
## F-statistic: 88.78 on 21 and 1750 DF, p-value: < 2.2e-16
The scatter plot between numbers of web purchases and number of monthly web visits indicates a moderate relation between these two variables.
##
## Pearson's Chi-squared test
##
## data: df$NumWebPurchases and df$NumWebVisitsMonth
## X-squared = 1249.8, df = 210, p-value < 2.2e-16
Furthermore, according to the Chi-Squared test, there is a relationship between web purchases and number of web visit per month.
## [1] 15.15094
Observed Frequencies Table
## Country AcceptedCmp1 AcceptedCmp2 AcceptedCmp3 AcceptedCmp4 AcceptedCmp5
## 1 AUS 7 0 9 6 12
## 2 CA 18 6 18 24 21
## 3 GER 7 2 10 11 8
## 4 IND 7 2 13 10 6
## 5 ME 0 0 1 0 0
## 6 SA 20 4 21 20 21
## 7 SP 76 16 83 87 89
## 8 US 7 0 8 6 5
Expected Frequencies Table
## Country AcceptedCmp1 AcceptedCmp2 AcceptedCmp3 AcceptedCmp4 AcceptedCmp5
## 1 AUS 7.304085 1.54311649 8.3842663 8.4357035 8.3328290
## 2 CA 18.689864 3.94856278 21.4538578 21.5854766 21.3222390
## 3 GER 8.163389 1.72465961 9.3706505 9.4281392 9.3131619
## 4 IND 8.163389 1.72465961 9.3706505 9.4281392 9.3131619
## 5 ME 0.214826 0.04538578 0.2465961 0.2481089 0.2450832
## 6 SA 18.475038 3.90317700 21.2072617 21.3373676 21.0771558
## 7 SP 75.403933 15.93040847 86.5552194 87.0862330 86.0242057
## 8 US 5.585477 1.18003026 6.4114977 6.4508321 6.3721634
According to our test, we cannot reject the null hypothesis since the Chi-Squared is 15.1509352 with a degrees of freedom of 28, at which is below the Chi-Squared significant level of 41.337. Therefore, both variables are independent to each other.
According our data, the average amount of fruits purchased is 26.3560469
##
## Call:
## lm(formula = MntMeatProducts + MntFishProducts ~ age_at_purchase +
## eduction_level + marital_binary + US + AcceptedCmp1 + AcceptedCmp2 +
## AcceptedCmp3 + AcceptedCmp4 + AcceptedCmp5 + Complain + Response,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -631.66 -147.94 -92.94 84.83 1569.31
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.4115 24.7592 5.550 3.20e-08 ***
## age_at_purchase 0.8385 0.4234 1.980 0.04781 *
## eduction_level -0.4946 5.0829 -0.097 0.92249
## marital_binary -8.4262 10.5606 -0.798 0.42502
## US 31.5675 23.2469 1.358 0.17463
## AcceptedCmp1 219.4270 22.9106 9.578 < 2e-16 ***
## AcceptedCmp2 -127.5156 45.8293 -2.782 0.00544 **
## AcceptedCmp3 -53.2672 19.9562 -2.669 0.00766 **
## AcceptedCmp4 -66.0198 21.0636 -3.134 0.00175 **
## AcceptedCmp5 285.2607 22.1809 12.861 < 2e-16 ***
## Complain -52.3357 51.4492 -1.017 0.30916
## Response 79.1251 15.8195 5.002 6.13e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 234 on 2204 degrees of freedom
## Multiple R-squared: 0.1915, Adjusted R-squared: 0.1874
## F-statistic: 47.44 on 11 and 2204 DF, p-value: < 2.2e-16
According our regression analysis, the significant qualitative factors are age at purchases, campaign acceptance and all our campaigns.
According to our data, the customers buying the most amount of fish are the people with the 2n Cycle education level at the average of 48.04
## MntFruits MntMeatProducts MntFishProducts MntSweetProducts
## 1 20159 110127 27418 21414
According to our data, the households with teenagers spent on meat product the most at 110127
According to our data, Campaign 4 is the most accepted campaign with 164 responses accepted; however, both Campaign 3 and 5 are right behind with 163 and 162 accepted responses, respectively.
According to the collected samples, there are 21 complains out of 2216 observations. Out of the 21 complaining observation:
67 % are customers with Graduation level.62 % are customers with married or together status.80.95 % are customers with children at home.66.67 % of the complaints are from Spain.The average age of customers at the dates of their purchases is 44 years old.
The average of customer’s current age is 52 years old.